clear

// =================
// Prep breakthrough data
// =================

use "./input/kpst_breakthrough/KPST_Breakthrough_v201912.dta"

foreach v of varlist rfsim05 rfsim010 {
	gegen med_`v' = median(`v')
	gen break_p50_`v' = (`v'>=med_`v')
}

foreach v of varlist rrfsim05 rrfsim010 {
	bys filed_year: gegen med_`v' = median(`v')
	gen break_p50_`v' = (`v'>=med_`v')
}

*CHECK: collapse (mean) break_p50_rfsim05 break_p50_rfsim010 break_p50_rrfsim05 break_p50_rrfsim010, by(filed_year)

keep patnum break_p50*
compress

tempfile break
save	"`break'", replace

// =================
// Patent Classes, CLS cities 
// =================

import 	delimited ".\input\CUSP\patents_cpc_categories_from_1900.csv", clear
duplicates drop patnum,  force

tempfile cpc
save	"`cpc'", replace
clear

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/citiescls_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

drop if cls_id<400  // Ensure merge on cls_id (useful) and removes many irrelevant obs (faster)

merge	m:1 patnum using "`break'"
/* _merge=1: Not in Kelly data
   _merge=2: outside date range etc */
drop if _merge==2
drop 	_merge     

foreach v of varlist break_p50_rfsim05 break_p50_rfsim010 break_p50_rrfsim05 break_p50_rrfsim010 {
	replace `v' = `v'*pat_wtd_inv
	rename 	`v' `v'_wtd_inv
}

// Collapse, etc.
collapse (sum) pat_* break*, by(f_m f_yr cls_id)
compress

sort 	f_yr f_m cls_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr cls_id pat_* break*
*rename  cls_id master_id

tempfile cls_cities_bt
save 	"`cls_cities_bt'", replace


// =================
// Patent Data, NPI cities
// =================

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/cities_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

merge	m:1 patnum using "`break'"
/* _merge=1: Not in Kelly data
   _merge=2: outside date range etc */
drop if _merge==2
drop 	_merge     

foreach v of varlist break_p50_rfsim05 break_p50_rfsim010 break_p50_rrfsim05 break_p50_rrfsim010 {
	replace `v' = `v'*pat_wtd_inv
	rename 	`v' `v'_wtd_inv
}

// Collapse etc.
collapse (sum) pat_* break*, by(f_m f_yr npi_id)
compress

sort 	f_yr f_m npi_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr npi_id pat_* break*
*rename  npi_id master_id

append using "`cls_cities_bt'"

gen 	master_id = npi_id
replace master_id = cls_id if mi(npi_id)

keep if inlist(master_id, 470, 596, 724, 780, 790, 794, 872) | mi(cls_id)
replace master_id = 470 if master_id==43
replace master_id = 596 if master_id==44
replace master_id = 724 if master_id==45
replace master_id = 780 if master_id==46
replace master_id = 790 if master_id==47
replace master_id = 794 if master_id==48
replace master_id = 872 if master_id==49

drop 	cls_id npi_id

xtset master_id f_myr
tsfill, full

unab pvars : pat_* break*

foreach v of local pvars {
	replace `v' = 0 if mi(`v')
}

keep if tin(1900m1, 1929m12)

order master_id f_myr, first

compress 
save 	"./output/intermediate/citiesall_patents_breakthrough", replace

clear
